Geocoded individual transaction data
GIS shapefiles
Note that all longitudes and latitudes should be in WGS1984 format
#pragma nodebook off
#Use nodebook for better reproducibility https://github.com/uoa-eResearch/nodebook
%reload_ext nodebook.ipython
%nodebook disk phase3
# load libraries
import geopandas as gpd # vector data
import pandas as pd # tabular data, loading CSVs
import numpy as np # numeric data
from util import *
import matplotlib # plotting
import contextily as ctx # Used for contextual basemaps
from matplotlib_scalebar.scalebar import ScaleBar # scalebar for plot
import matplotlib.pyplot as plt # plotting
from tqdm.auto import tqdm # progress bars
tqdm.pandas()
import json
from shapely.geometry import Point, shape, LineString, MultiLineString, GeometryCollection, MultiPoint, Polygon # creating points
import requests # web requests
from pandarallel import pandarallel # parallel operations for speed
pandarallel.initialize(progress_bar=True)
plt.rcParams['figure.figsize'] = (20, 20)
INFO: Pandarallel will run on 16 workers. INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
ls()
| name | filesize (MB) | last modified | |
|---|---|---|---|
| 0 | 2013-mb-dataset-Total-New-Zealand-Household.csv | 37.12 | 2014-06-04 10:56:30.000000 |
| 1 | 2013-mb-dataset-Total-New-Zealand-Individual-Part-1.csv | 31.66 | 2014-04-01 10:13:15.197000 |
| 2 | 2013_census_mean_income_by_AU2013.xlsx | 0.59 | 2021-09-07 06:21:24.000000 |
| 3 | 2018-census-electoral-population-meshblock-2020-data.csv | 5.60 | 2021-08-09 00:39:18.000000 |
| 4 | 2018_census_dwellings_by_SA2.xlsx | 0.43 | 2021-07-12 14:30:28.000000 |
| 5 | AC_Special_Housing_Area.zip | 0.29 | 2021-09-02 10:36:16.760000 |
| 6 | AucklandArea.gpkg | 0.09 | 2021-09-02 10:36:17.220000 |
| 7 | Geocoordinates_Direct_Transit_stops_AKL.xlsx | 0.01 | 2020-10-08 07:52:29.000000 |
| 8 | Individual_part1_totalNZ-wide_format_updated_16-7-20.csv | 36.58 | 2020-07-14 16:12:24.000000 |
| 9 | MASTER_UP_BaseZone_SHP.zip | 66.92 | 2021-07-19 02:23:51.137347 |
| 10 | Modified_Community_Boards_SHP.zip | 1.30 | 2021-07-19 02:16:07.650000 |
| 11 | area-unit-2013.gdb.zip | 14.21 | 2021-09-02 10:36:16.070000 |
| 12 | kx-nz-state-highway-on-ramps-off-ramps-SHP.zip | 0.14 | 2021-08-25 09:52:06.341291 |
| 13 | lds-nz-coastline-mean-high-water-FGDB.zip | 4.88 | 2021-08-31 16:25:16.250000 |
| 14 | lds-nz-coastlines-and-islands-polygons-topo-150k-FGDB.zip | 4.25 | 2021-08-05 15:57:49.020000 |
| 15 | lds-nz-primary-parcels-CLIPPED-4326.gpkg | 273.35 | 2021-09-02 10:37:32.410000 |
| 16 | lds-nz-primary-parcels-FGDB.zip | 79.03 | 2021-08-12 09:13:27.910000 |
| 17 | lds-nz-railway-centrelines-topo-150k-SHP.zip | 0.87 | 2021-08-31 10:01:46.315326 |
| 18 | lds-nz-road-centrelines-topo-150k-FGDB.zip | 36.06 | 2021-08-31 09:43:07.931704 |
| 19 | lds-nz-street-address-GPKG-CLIPPED.gpkg | 170.23 | 2021-09-02 10:37:19.490000 |
| 20 | lds-nz-street-address-GPKG.zip | 200.88 | 2021-09-02 10:37:26.070000 |
| 21 | meshblock-2013.gdb.zip | 106.57 | 2021-09-02 10:36:59.860000 |
| 22 | meshblock-2018-clipped-generalised.gdb.zip | 32.83 | 2021-09-02 10:36:26.010000 |
| 23 | nz-primary-parcels.gdb.zip | 55.00 | 2021-09-02 10:36:39.020000 |
| 24 | statsnzarea-unit-2013-FGDB.zip | 13.79 | 2021-08-31 16:56:17.150000 |
| 25 | statsnzmeshblock-higher-geographies-2018-generalised-FGDB.zip | 34.54 | 2021-08-05 14:53:54.350000 |
| 26 | statsnzpopulation-by-meshblock-2013-census-FGDB.zip | 82.11 | 2021-07-19 13:53:55.150631 |
| 27 | statsnzstatistical-area-2-higher-geographies-2018-clipped-generalis-FGDB.zip | 10.72 | 2021-08-30 17:12:55.591895 |
Total: 1300.0MB
ls("restricted")
| name | filesize (MB) | last modified | |
|---|---|---|---|
| 0 | QPIDs_Auckland_1990_2020.csv | 17.40 | 2021-07-07 13:09:39.000 |
| 1 | QPIDs_Auckland_1990_2020_augmented.csv | 205.58 | 2021-09-07 12:36:15.880 |
Total: 223.0MB
df = pd.read_csv("restricted/QPIDs_Auckland_1990_2020.csv")
df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.CL_Longitude, df.CL_Latitude), crs="EPSG:4326")
df = df.set_index("CL_QPID_output2")
df
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | |
|---|---|---|---|---|
| CL_QPID_output2 | ||||
| 75494 | 174.588941 | -36.186076 | 2020 | POINT (174.58894 -36.18608) |
| 75499 | 174.581811 | -36.200345 | 2020 | POINT (174.58181 -36.20034) |
| 75639 | 174.496590 | -36.228740 | 2020 | POINT (174.49659 -36.22874) |
| 75640 | 174.498361 | -36.228827 | 2020 | POINT (174.49836 -36.22883) |
| 75654 | 174.493736 | -36.229990 | 2020 | POINT (174.49374 -36.22999) |
| ... | ... | ... | ... | ... |
| 3160123 | 174.728112 | -36.408104 | 2018 | POINT (174.72811 -36.40810) |
| 3160124 | 174.728164 | -36.408113 | 2018 | POINT (174.72816 -36.40811) |
| 3160630 | 174.655992 | -36.509925 | 2018 | POINT (174.65599 -36.50992) |
| 3164546 | 174.537978 | -36.774834 | 2018 | POINT (174.53798 -36.77483) |
| 3166852 | 174.644137 | -36.854796 | 2018 | POINT (174.64414 -36.85480) |
383272 rows × 4 columns
%%time
parcels = gpd.read_file('input/lds-nz-primary-parcels-FGDB.zip!nz-primary-parcels.gdb')
parcels = parcels.to_crs(df.crs)
parcels = parcels.set_index("id")
parcels["parcel_geometry"] = parcels.geometry
CPU times: user 39.2 s, sys: 1.45 s, total: 40.7 s Wall time: 41.4 s
parcels
| appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | geometry | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| id | |||||||||||
| 5266447 | None | None | Hydro | Primary | None | North Auckland | None | NaN | 1289690.0 | MULTIPOLYGON (((174.46092 -36.26249, 174.46083... | MULTIPOLYGON (((174.46092 -36.26249, 174.46083... |
| 4789727 | Part Lot 3 Allot 64 Section 1 SBRS OF Auckland | SO 663 | DCDB | Primary | None | North Auckland | None | NaN | 1.0 | MULTIPOLYGON (((174.77836 -36.85187, 174.77838... | MULTIPOLYGON (((174.77836 -36.85187, 174.77838... |
| 4810316 | Part Tidal Lands of Manukau Harbour Survey Off... | SO 67474 | DCDB | Primary | [Create] Revested in the Crown Sec 5 Foreshore... | North Auckland | None | 31600000.0 | 31342451.0 | MULTIPOLYGON (((174.68836 -37.01631, 174.68898... | MULTIPOLYGON (((174.68836 -37.01631, 174.68898... |
| 4817943 | Crown Land Survey Office Plan 58175 | SO 58175 | DCDB | Primary | [Create] Crown Land Reserved from Sale Sec 58 ... | North Auckland | None | NaN | 467490.0 | MULTIPOLYGON (((174.33091 -36.32797, 174.33059... | MULTIPOLYGON (((174.33091 -36.32797, 174.33059... |
| 4827816 | Lot 2 DP 165098 | DP 165098 | DCDB | Primary | None | North Auckland | NA99B/977 | 22979.0 | 22972.0 | MULTIPOLYGON (((174.73406 -36.69245, 174.73408... | MULTIPOLYGON (((174.73406 -36.69245, 174.73408... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7291940 | Lot 745 DP 433546 | DP 433546 | Fee Simple Title | Primary | None | North Auckland | 528968 | 247.0 | 247.0 | MULTIPOLYGON (((174.93787 -37.03903, 174.93763... | MULTIPOLYGON (((174.93787 -37.03903, 174.93763... |
| 7266269 | Lot 533 DP 427884 | DP 427884, LT 454543 | Fee Simple Title | Primary | None | North Auckland | 520875 | 336.0 | 336.0 | MULTIPOLYGON (((174.93990 -37.03814, 174.94015... | MULTIPOLYGON (((174.93990 -37.03814, 174.94015... |
| 8051540 | Lot 8 DP 533517 | DP 533517 | Fee Simple Title | Primary | None | North Auckland | 876934 | 185.0 | 185.0 | MULTIPOLYGON (((174.67269 -36.59236, 174.67269... | MULTIPOLYGON (((174.67269 -36.59236, 174.67269... |
| 7264065 | Lot 518 DP 429024 | DP 429024 | Fee Simple Title | Primary | None | North Auckland | 513803 | 313.0 | 313.0 | MULTIPOLYGON (((174.93753 -37.03880, 174.93747... | MULTIPOLYGON (((174.93753 -37.03880, 174.93747... |
| 7266268 | Lot 532 DP 427884 | DP 427884, LT 454543 | Fee Simple Title | Primary | None | North Auckland | 520874 | 331.0 | 331.0 | MULTIPOLYGON (((174.94009 -37.03794, 174.94015... | MULTIPOLYGON (((174.94009 -37.03794, 174.94015... |
537290 rows × 11 columns
%%time
df = gpd.sjoin(df, parcels, how="left")
CPU times: user 38.9 s, sys: 2.21 s, total: 41.1 s Wall time: 41.3 s
df.parcel_intent.value_counts()
DCDB 276092 Fee Simple Title 99917 Legalisation 79 Road 3 Vesting on Deposit for Local Purpose Reserve 2 Vesting on Deposit for Recreation Reserve (Territorial Authority) 1 Name: parcel_intent, dtype: int64
# Roads shouldn't be in this dataset - might have to give these 3 points a little nudge
sold_roads = df[df.parcel_intent == "Road"]
for i in range(len(sold_roads)):
sold_road = sold_roads.iloc[i:i+1]
display(sold_road)
ax = sold_road.parcel_geometry.to_crs(epsg=3857).plot(alpha=.5)
sold_road.parcel_geometry.centroid.to_crs(epsg=3857).plot(ax=ax, color="red")
sold_road.to_crs(epsg=3857).plot(ax=ax, color="green")
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery, zoom=21 if i == 0 else "auto")
ax.set_title("Red = parcel centroid, green = QPID latlong")
plt.show()
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 258637 | 174.84187 | -36.903922 | 2020 | POINT (174.84187 -36.90392) | 7515185.0 | Section 1 SO 471988 | SO 471988, SO 509515 | Road | Primary | [Create] Land declared Road New Zealand Gazett... | North Auckland | None | 690.0 | 688.0 | MULTIPOLYGON (((174.84229 -36.90402, 174.84226... |
<string>:6: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 1528194 | 174.642663 | -36.59097 | 2020 | POINT (174.64266 -36.59097) | 6605265.0 | None | SO 70497 | Road | Primary | [Create] Road New Zealand Gazette 2002 p 4425 | North Auckland | None | NaN | 31611.0 | MULTIPOLYGON (((174.63916 -36.58870, 174.63934... |
<string>:6: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 1779952 | 174.749001 | -36.823758 | 2020 | POINT (174.74900 -36.82376) | 5222196.0 | None | None | Road | Primary | None | North Auckland | None | NaN | 3462.0 | MULTIPOLYGON (((174.74895 -36.82419, 174.74897... |
<string>:6: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
df.at[258637, "parcel_intent"] = "Glitch?"
print(f"{1e-4} degrees equates to {df.loc[[1528194]].to_crs(epsg=2193).distance(df.loc[[1528194]].translate(yoff=-1e-4).to_crs(epsg=2193)).iloc[0]} meters")
0.0001 degrees equates to 11.09551261419352 meters
problems = [1528194, 1779952]
display(df.loc[problems])
# Nudge point south a little bit
df.geometry[1528194] = df.loc[[1528194]].translate(yoff=-1e-4)
# Nudge point west a little bit
df.geometry[1779952] = df.loc[[1779952]].translate(xoff=-1e-4)
# Redo the join for these newly adjusted points
subset = df.loc[df.index.isin(problems), ["CL_Longitude", "CL_Latitude", "QPID_vintage", "geometry"]]
df.loc[problems] = gpd.sjoin(subset, parcels)
display(df.loc[problems])
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 1528194 | 174.642663 | -36.590970 | 2020 | POINT (174.64266 -36.59097) | 6605265.0 | None | SO 70497 | Road | Primary | [Create] Road New Zealand Gazette 2002 p 4425 | North Auckland | None | NaN | 31611.0 | MULTIPOLYGON (((174.63916 -36.58870, 174.63934... |
| 1779952 | 174.749001 | -36.823758 | 2020 | POINT (174.74900 -36.82376) | 5222196.0 | None | None | Road | Primary | None | North Auckland | None | NaN | 3462.0 | MULTIPOLYGON (((174.74895 -36.82419, 174.74897... |
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 1528194 | 174.642663 | -36.590970 | 2020 | POINT (174.64266 -36.59107) | 5201612.0 | Part Allot 74 PSH OF Waiwera | LT 549520, SO 1693/B | DCDB | Primary | None | North Auckland | NA1129/238 | 2289.0 | 2286.0 | MULTIPOLYGON (((174.64203 -36.59121, 174.64232... |
| 1779952 | 174.749001 | -36.823758 | 2020 | POINT (174.74890 -36.82376) | 4839993.0 | Lot 1 DP 154975 | DP 154975 | DCDB | Primary | None | North Auckland | NA125D/837, NA125D/838 | 811.0 | 760.0 | MULTIPOLYGON (((174.74859 -36.82361, 174.74901... |
df.parcel_intent.value_counts()
DCDB 276094 Fee Simple Title 99917 Legalisation 79 Vesting on Deposit for Local Purpose Reserve 2 Glitch? 1 Vesting on Deposit for Recreation Reserve (Territorial Authority) 1 Name: parcel_intent, dtype: int64
df = df.rename(columns={"titles": "LINZ_parcel_ID"})
df.index_right = df.index_right.astype('Int64')
df
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | LINZ_parcel_ID | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 75494 | 174.588941 | -36.186076 | 2020 | POINT (174.58894 -36.18608) | 5128312 | Part Allot S25 PSH OF Arai | None | DCDB | Primary | None | North Auckland | NA589/73 | NaN | 2913.0 | MULTIPOLYGON (((174.58877 -36.18632, 174.58888... |
| 75499 | 174.581811 | -36.200345 | 2020 | POINT (174.58181 -36.20034) | 5128309 | Lot 2 DP 130303 | DP 130303 | DCDB | Primary | None | North Auckland | NA76B/662 | 9859.0 | 8620.0 | MULTIPOLYGON (((174.58154 -36.20156, 174.58161... |
| 75639 | 174.496590 | -36.228740 | 2020 | POINT (174.49659 -36.22874) | 4823770 | Part Otioro & Te Topuni A2A Block | ML 9928 | DCDB | Primary | None | North Auckland | NA1373/48 | 8296.0 | 7490.0 | MULTIPOLYGON (((174.49514 -36.22889, 174.49701... |
| 75640 | 174.498361 | -36.228827 | 2020 | POINT (174.49836 -36.22883) | 4697657 | Lot 1 DP 44316 | DP 44316 | DCDB | Primary | None | North Auckland | NA1373/47 | 3667.0 | 4152.0 | MULTIPOLYGON (((174.49863 -36.22877, 174.49862... |
| 75654 | 174.493736 | -36.229990 | 2020 | POINT (174.49374 -36.22999) | 5202126 | Lot 1 DP 52926 | DP 52926 | DCDB | Primary | None | North Auckland | NA8B/226 | 3331.0 | 3323.0 | MULTIPOLYGON (((174.49427 -36.23026, 174.49386... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3160123 | 174.728112 | -36.408104 | 2018 | POINT (174.72811 -36.40810) | 7974171 | Lot 82 DP 521864 | DP 521864 | Fee Simple Title | Primary | None | North Auckland | 826394 | 39.0 | 40.0 | MULTIPOLYGON (((174.72813 -36.40815, 174.72812... |
| 3160124 | 174.728164 | -36.408113 | 2018 | POINT (174.72816 -36.40811) | 7974172 | Lot 83 DP 521864 | DP 521864 | Fee Simple Title | Primary | None | North Auckland | 826395 | 39.0 | 40.0 | MULTIPOLYGON (((174.72813 -36.40815, 174.72814... |
| 3160630 | 174.655992 | -36.509925 | 2018 | POINT (174.65599 -36.50992) | 7440747 | Lot 2 DP 460961 | DP 460961 | Fee Simple Title | Primary | None | North Auckland | 605470 | 4500.0 | 4495.0 | MULTIPOLYGON (((174.65635 -36.51025, 174.65624... |
| 3164546 | 174.537978 | -36.774834 | 2018 | POINT (174.53798 -36.77483) | 7987560 | Lot 1 DP 533552 | DP 533552 | Fee Simple Title | Primary | None | North Auckland | 878889 | 62163.0 | 62143.0 | MULTIPOLYGON (((174.53978 -36.77485, 174.53974... |
| 3166852 | 174.644137 | -36.854796 | 2018 | POINT (174.64414 -36.85480) | 7799795 | Section 36 SO 498829 | SO 498829 | Fee Simple Title | Primary | [Create] Fee Simple Title New Zealand Gazette ... | North Auckland | 900731 | 1460.0 | 1459.0 | MULTIPOLYGON (((174.64432 -36.85494, 174.64428... |
383272 rows × 15 columns
sum(pd.isna(df.LINZ_parcel_ID))
7202
df.LINZ_parcel_ID.sample(10)
CL_QPID_output2 1656620 NA112B/92, NA92A/927 2778909 553672 2982737 708817 311957 NA33C/665 151569 NA43B/133, NA43B/134 269750 None 1585992 NA69D/640 162868 NA1B/1263 1576972 NA20A/590 2326202 121037, 123163, 126156, 126160, 126161, 126162... Name: LINZ_parcel_ID, dtype: object
b. Centroid longitude of parcel(s). LINZ_parcel_centroid_lon
c. Centroid latitude of parcel(s). LINZ_parcel_centroid_lat
df["LINZ_parcel_centroid_lon"] = df.parcel_geometry.centroid.x
df["LINZ_parcel_centroid_lat"] = df.parcel_geometry.centroid.y
<string>:1: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. <string>:2: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
sample_parcels = parcels.cx[174.782:174.783, -36.870:-36.871]
ax = sample_parcels.to_crs(epsg=3857).plot(column="parcel_intent", legend=True, alpha=.5, categorical=True, edgecolor="black")
sample_parcels.centroid.to_crs(epsg=3857).plot(ax=ax, color="red")
df[df.index_right.isin(sample_parcels.index)].to_crs(epsg=3857).plot(ax=ax, color="green")
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery)
plt.title("Red = parcel centroid, green = QPID latlong")
<string>:2: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
Text(0.5, 1.0, 'Red = parcel centroid, green = QPID latlong')
d. vector of longitudes of the vertices of the matched LINZ parcels LINZ_parcel_vertices_lon
e. vector of latitudes of the vertices of the matched LINZ parcels LINZ_parcel_vertices_lat
pd.Series(type(a) for a in df.parcel_geometry).value_counts()
<class 'shapely.geometry.multipolygon.MultiPolygon'> 376094 <class 'NoneType'> 7178 dtype: int64
%%time
def extract_verts(geometry):
lat = np.nan
lng = np.nan
if geometry:
coordinates = []
for polygon in geometry:
coordinates.extend(polygon.exterior.coords)
lng = f"[{'; '.join([str(round(point[0], 6)) for point in coordinates])}]"
lat = f"[{'; '.join([str(round(point[1], 6)) for point in coordinates])}]"
return pd.Series([lng, lat])
df[["LINZ_parcel_vertices_lon", "LINZ_parcel_vertices_lat"]] = df.parcel_geometry.progress_apply(extract_verts)
<string>:6: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.
CPU times: user 1min 27s, sys: 2.29 s, total: 1min 29s Wall time: 1min 27s
f. subvector of longitudes of parcel that sits adjacent to a road LINZ_parcel_roadvertices_lon
g. subvector of latitudes of parcel that sits adjacent to a road LINZ_parcel_roadvertices_lat
Let's start with a simple test of one row.
sample = df.iloc[0]
sample
/usr/local/lib/python3.8/dist-packages/pandas/core/dtypes/inference.py:383: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry. iter(obj) # Can iterate over it. /usr/local/lib/python3.8/dist-packages/pandas/core/dtypes/inference.py:384: ShapelyDeprecationWarning: __len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0. Check the length of the `geoms` property instead to get the number of parts of a multi-part geometry. len(obj) # Has a length associated with it. /usr/local/lib/python3.8/dist-packages/pandas/io/formats/printing.py:118: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry. s = iter(seq) /usr/local/lib/python3.8/dist-packages/pandas/io/formats/printing.py:122: ShapelyDeprecationWarning: __len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0. Check the length of the `geoms` property instead to get the number of parts of a multi-part geometry. for i in range(min(nitems, len(seq))) /usr/local/lib/python3.8/dist-packages/pandas/io/formats/printing.py:126: ShapelyDeprecationWarning: __len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0. Check the length of the `geoms` property instead to get the number of parts of a multi-part geometry. if nitems < len(seq):
CL_Longitude 174.588941 CL_Latitude -36.186076 QPID_vintage 2020 geometry POINT (174.588940530675 -36.1860763144825) index_right 5128312 appellation Part Allot S25 PSH OF Arai affected_surveys None parcel_intent DCDB topology_type Primary statutory_actions None land_district North Auckland LINZ_parcel_ID NA589/73 survey_area NaN calc_area 2913.0 parcel_geometry (POLYGON ((174.5887711830001 -36.1863244329999... LINZ_parcel_centroid_lon 174.589185 LINZ_parcel_centroid_lat -36.186039 LINZ_parcel_vertices_lon [174.588771; 174.588883; 174.589331; 174.58955... LINZ_parcel_vertices_lat [-36.186324; -36.185872; -36.18579; -36.186183... Name: 75494, dtype: object
%%time
xmin, ymin, xmax, ymax = sample.parcel_geometry.bounds
search_area = parcels.cx[xmin:xmax, ymin:ymax]
neighbours = search_area[search_area.touches(sample.parcel_geometry)]
neighbours
CPU times: user 12.2 s, sys: 501 ms, total: 12.7 s Wall time: 12.8 s
| appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | geometry | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| id | |||||||||||
| 5207551 | None | None | Road | Primary | None | North Auckland | None | NaN | 8063.0 | MULTIPOLYGON (((174.58888 -36.18587, 174.58877... | MULTIPOLYGON (((174.58888 -36.18587, 174.58877... |
| 5224880 | None | None | Road | Primary | None | North Auckland | None | NaN | 10916.0 | MULTIPOLYGON (((174.58967 -36.18646, 174.58967... | MULTIPOLYGON (((174.58967 -36.18646, 174.58967... |
| 4941664 | Lot 2 DP 130126 | DP 130126 | DCDB | Primary | None | North Auckland | NA131B/147 | 21810.0 | 21780.0 | MULTIPOLYGON (((174.59156 -36.18479, 174.59187... | MULTIPOLYGON (((174.59156 -36.18479, 174.59187... |
| 6744495 | Lot 43 DP 340819 | DP 340819, DP 477278, LT 409850 | Fee Simple Title | Primary | None | North Auckland | 661091 | 1529.0 | 1528.0 | MULTIPOLYGON (((174.58907 -36.18508, 174.58920... | MULTIPOLYGON (((174.58907 -36.18508, 174.58920... |
| 7558496 | Lot 16 DP 477278 | DP 477278, LT 494228 | Fee Simple Title | Primary | None | North Auckland | 661091 | 6043.0 | 6041.0 | MULTIPOLYGON (((174.58872 -36.18652, 174.58877... | MULTIPOLYGON (((174.58872 -36.18652, 174.58877... |
search_area.parcel_intent[sample.index_right] = "AOI"
cmap = matplotlib.colors.ListedColormap(['red', 'blue', 'green', 'pink'])
ax = search_area.to_crs(epsg=3857).plot(column="parcel_intent", cmap=cmap, legend=True, categorical=True, alpha=.5)
roads = neighbours[neighbours.parcel_intent == "Road"]
intersection = roads.geometry.intersection(sample.parcel_geometry)
intersection.to_crs(epsg=3857).plot(ax=ax, color="red", linewidth=5)
ctx.add_basemap(ax, url=ctx.providers.Esri.WorldImagery)
<string>:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy <string>:7: FutureWarning: The "url" option is deprecated. Please use the "source" argument instead.
%%time
roads = parcels[parcels.parcel_intent == "Road"]
CPU times: user 11.9 s, sys: 470 ms, total: 12.4 s Wall time: 12.6 s
%%time
def extract_road_intersections(geom):
if geom:
xmin, ymin, xmax, ymax = geom.bounds
neighbouring_roads = roads.cx[xmin:xmax, ymin:ymax]
return neighbouring_roads.geometry.intersection(geom).unary_union
return np.nan
road_intersections = df.parcel_geometry.parallel_apply(extract_road_intersections)
CPU times: user 11min 8s, sys: 31.7 s, total: 11min 40s Wall time: 1h 23min 27s
types = pd.Series(type(i) for i in road_intersections)
types.index = road_intersections.index
types.value_counts()
<class 'shapely.geometry.linestring.LineString'> 199991 <class 'shapely.geometry.multilinestring.MultiLineString'> 141418 <class 'NoneType'> 40163 <class 'shapely.geometry.collection.GeometryCollection'> 1290 <class 'shapely.geometry.point.Point'> 393 <class 'shapely.geometry.multipoint.MultiPoint'> 16 <class 'shapely.geometry.polygon.Polygon'> 1 dtype: int64
display(df[types == Polygon])
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | LINZ_parcel_ID | survey_area | calc_area | parcel_geometry | LINZ_parcel_centroid_lon | LINZ_parcel_centroid_lat | LINZ_parcel_vertices_lon | LINZ_parcel_vertices_lat | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||||||
| 258637 | 174.84187 | -36.903922 | 2020 | POINT (174.84187 -36.90392) | 7515185 | Section 1 SO 471988 | SO 471988, SO 509515 | Glitch? | Primary | [Create] Land declared Road New Zealand Gazett... | North Auckland | None | 690.0 | 688.0 | MULTIPOLYGON (((174.84229 -36.90402, 174.84226... | 174.841943 | -36.903956 | [174.842286; 174.842257; 174.84188; 174.841667... | [-36.904017; -36.904065; -36.904015; -36.90396... |
geom = df.parcel_geometry[258637]
xmin, ymin, xmax, ymax = geom.bounds
neighbouring_roads = roads.cx[xmin:xmax, ymin:ymax]
neighbouring_roads = neighbouring_roads[neighbouring_roads.touches(geom)]
road_intersections[258637] = neighbouring_roads.geometry.intersection(geom).unary_union
types[258637] = type(road_intersections[258637])
types.value_counts()
<class 'shapely.geometry.linestring.LineString'> 199991 <class 'shapely.geometry.multilinestring.MultiLineString'> 141419 <class 'NoneType'> 40163 <class 'shapely.geometry.collection.GeometryCollection'> 1290 <class 'shapely.geometry.point.Point'> 393 <class 'shapely.geometry.multipoint.MultiPoint'> 16 dtype: int64
collections = road_intersections[types == GeometryCollection]
collection_types = []
for c in collections:
collection_types.extend(type(o) for o in c)
pd.Series(collection_types).value_counts()
<string>:3: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.
<class 'shapely.geometry.linestring.LineString'> 1245 <class 'shapely.geometry.point.Point'> 572 <class 'shapely.geometry.polygon.Polygon'> 2 dtype: int64
for i, c in collections.iteritems():
if Polygon in [type(o) for o in c]:
print(i)
2211389 2232849
<string>:2: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.
problems = df.loc[[2211389, 2232849]]
display(problems)
xmin, ymin, xmax, ymax = problems.parcel_geometry.unary_union.bounds
search_area = parcels.cx[xmin:xmax, ymin:ymax]
search_area.parcel_intent[problems.index_right] = "AOI"
cmap = matplotlib.colors.ListedColormap(['red', 'blue', 'green', 'pink'])
ax = search_area.to_crs(epsg=3857).plot(column="parcel_intent", cmap=cmap, legend=True, categorical=True, alpha=.5, edgecolor="black")
roads = search_area[search_area.parcel_intent == "Road"]
intersection = roads.geometry.intersection(problems.parcel_geometry.unary_union)
intersection.to_crs(epsg=3857).plot(ax=ax, color="red", linewidth=5)
intersection.boundary.to_crs(epsg=3857).plot(ax=ax, color="blue", linewidth=5)
display(intersection)
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery)
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | LINZ_parcel_ID | survey_area | calc_area | parcel_geometry | LINZ_parcel_centroid_lon | LINZ_parcel_centroid_lat | LINZ_parcel_vertices_lon | LINZ_parcel_vertices_lat | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||||||
| 2211389 | 174.690915 | -36.764765 | 2020 | POINT (174.69092 -36.76476) | 6605986 | Lot 109 DP 310598 | DP 310598 | Fee Simple Title | Primary | None | North Auckland | 41583 | 603.0 | 603.0 | MULTIPOLYGON (((174.69072 -36.76480, 174.69098... | 174.690924 | -36.764765 | [174.69072; 174.690984; 174.691061; 174.691117... | [-36.764801; -36.764606; -36.764662; -36.76473... |
| 2232849 | 174.690710 | -36.764933 | 2020 | POINT (174.69071 -36.76493) | 6616860 | Lot 68 DP 312607 | DP 312607 | Fee Simple Title | Primary | None | North Auckland | 49611 | 502.0 | 502.0 | MULTIPOLYGON (((174.69053 -36.76494, 174.69072... | 174.690721 | -36.764940 | [174.69053; 174.69072; 174.690863; 174.690907;... | [-36.764943; -36.764801; -36.764926; -36.76496... |
<string>:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /usr/local/lib/python3.8/dist-packages/pandas/core/series.py:1135: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy self._set_values(indexer, value)
id 6616875 POLYGON ((174.69072 -36.76480, 174.69053 -36.7... 6606001 MULTILINESTRING ((174.69112 -36.76474, 174.691... 6616867 MULTILINESTRING ((174.69053 -36.76494, 174.690... 6606004 POLYGON ((174.69072 -36.76480, 174.69072 -36.7... dtype: geometry
%%time
LINZ_parcel_roadvertices_lon = []
LINZ_parcel_roadvertices_lat = []
for intersection in tqdm(road_intersections):
lat = np.nan
lon = np.nan
if intersection:
lat = set()
lon = set()
try:
if type(intersection) in (LineString, Point):
lon.update(intersection.xy[0])
lat.update(intersection.xy[1])
elif type(intersection) in (MultiLineString, GeometryCollection, MultiPoint):
for p in intersection.geoms:
if type(p) == Polygon:
lon.update(p.exterior.xy[0])
lat.update(p.exterior.xy[1])
else:
lon.update(p.xy[0])
lat.update(p.xy[1])
else:
raise Exception(f"Don't know how to handle {type(intersection)}")
except:
print(type(intersection))
print(intersection)
raise
lon = f"[{'; '.join([str(round(lon, 6)) for lon in list(lon)])}]"
lat = f"[{'; '.join([str(round(lat, 6)) for lat in list(lat)])}]"
LINZ_parcel_roadvertices_lon.append(lon)
LINZ_parcel_roadvertices_lat.append(lat)
df["LINZ_parcel_roadvertices_lon"] = LINZ_parcel_roadvertices_lon
df["LINZ_parcel_roadvertices_lat"] = LINZ_parcel_roadvertices_lat
CPU times: user 39.8 s, sys: 776 ms, total: 40.6 s Wall time: 40.5 s
df
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | ... | LINZ_parcel_ID | survey_area | calc_area | parcel_geometry | LINZ_parcel_centroid_lon | LINZ_parcel_centroid_lat | LINZ_parcel_vertices_lon | LINZ_parcel_vertices_lat | LINZ_parcel_roadvertices_lon | LINZ_parcel_roadvertices_lat | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||||||||
| 75494 | 174.588941 | -36.186076 | 2020 | POINT (174.58894 -36.18608) | 5128312 | Part Allot S25 PSH OF Arai | None | DCDB | Primary | None | ... | NA589/73 | NaN | 2913.0 | MULTIPOLYGON (((174.58877 -36.18632, 174.58888... | 174.589185 | -36.186039 | [174.588771; 174.588883; 174.589331; 174.58955... | [-36.186324; -36.185872; -36.18579; -36.186183... | [174.589331; 174.589553; 174.588771; 174.58955... | [-36.186324; -36.185872; -36.18575; -36.186183... |
| 75499 | 174.581811 | -36.200345 | 2020 | POINT (174.58181 -36.20034) | 5128309 | Lot 2 DP 130303 | DP 130303 | DCDB | Primary | None | ... | NA76B/662 | 9859.0 | 8620.0 | MULTIPOLYGON (((174.58154 -36.20156, 174.58161... | 174.581884 | -36.200845 | [174.581541; 174.581606; 174.581586; 174.58184... | [-36.201565; -36.201229; -36.201017; -36.19972... | [174.581911; 174.581541; 174.581586; 174.58160... | [-36.201017; -36.199726; -36.201565; -36.20122... |
| 75639 | 174.496590 | -36.228740 | 2020 | POINT (174.49659 -36.22874) | 4823770 | Part Otioro & Te Topuni A2A Block | ML 9928 | DCDB | Primary | None | ... | NA1373/48 | 8296.0 | 7490.0 | MULTIPOLYGON (((174.49514 -36.22889, 174.49701... | 174.496751 | -36.228824 | [174.495137; 174.497012; 174.497667; 174.49766... | [-36.228888; -36.228552; -36.228639; -36.22893... | [174.497012; 174.495137; 174.497667] | [-36.228552; -36.228639; -36.228888] |
| 75640 | 174.498361 | -36.228827 | 2020 | POINT (174.49836 -36.22883) | 4697657 | Lot 1 DP 44316 | DP 44316 | DCDB | Primary | None | ... | NA1373/47 | 3667.0 | 4152.0 | MULTIPOLYGON (((174.49863 -36.22877, 174.49862... | 174.498193 | -36.228933 | [174.498626; 174.498615; 174.498386; 174.49766... | [-36.228766; -36.229337; -36.22924; -36.228936... | [174.497667; 174.498626] | [-36.228639; -36.228766] |
| 75654 | 174.493736 | -36.229990 | 2020 | POINT (174.49374 -36.22999) | 5202126 | Lot 1 DP 52926 | DP 52926 | DCDB | Primary | None | ... | NA8B/226 | 3331.0 | 3323.0 | MULTIPOLYGON (((174.49427 -36.23026, 174.49386... | 174.494172 | -36.230010 | [174.49427; 174.493857; 174.493618; 174.493729... | [-36.230265; -36.230116; -36.230056; -36.22977... | [174.493729; 174.494901] | [-36.229771; -36.229975] |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3160123 | 174.728112 | -36.408104 | 2018 | POINT (174.72811 -36.40810) | 7974171 | Lot 82 DP 521864 | DP 521864 | Fee Simple Title | Primary | None | ... | 826394 | 39.0 | 40.0 | MULTIPOLYGON (((174.72813 -36.40815, 174.72812... | 174.728112 | -36.408104 | [174.728128; 174.728123; 174.728076; 174.72808... | [-36.408145; -36.408144; -36.408135; -36.40810... | NaN | NaN |
| 3160124 | 174.728164 | -36.408113 | 2018 | POINT (174.72816 -36.40811) | 7974172 | Lot 83 DP 521864 | DP 521864 | Fee Simple Title | Primary | None | ... | 826395 | 39.0 | 40.0 | MULTIPOLYGON (((174.72813 -36.40815, 174.72814... | 174.728164 | -36.408113 | [174.728128; 174.728137; 174.728139; 174.72814... | [-36.408145; -36.408111; -36.408106; -36.40807... | NaN | NaN |
| 3160630 | 174.655992 | -36.509925 | 2018 | POINT (174.65599 -36.50992) | 7440747 | Lot 2 DP 460961 | DP 460961 | Fee Simple Title | Primary | None | ... | 605470 | 4500.0 | 4495.0 | MULTIPOLYGON (((174.65635 -36.51025, 174.65624... | 174.655985 | -36.510014 | [174.656354; 174.656238; 174.656031; 174.65582... | [-36.510252; -36.510457; -36.510421; -36.51039... | [174.655895; 174.656017; 174.655934; 174.65615... | [-36.509415; -36.50944; -36.509474; -36.509423... |
| 3164546 | 174.537978 | -36.774834 | 2018 | POINT (174.53798 -36.77483) | 7987560 | Lot 1 DP 533552 | DP 533552 | Fee Simple Title | Primary | None | ... | 878889 | 62163.0 | 62143.0 | MULTIPOLYGON (((174.53978 -36.77485, 174.53974... | 174.538969 | -36.773813 | [174.539778; 174.539738; 174.539702; 174.53967... | [-36.774848; -36.774902; -36.774953; -36.77500... | [174.53962; 174.539652; 174.539673; 174.539702... | [-36.774902; -36.772768; -36.774953; -36.77358... |
| 3166852 | 174.644137 | -36.854796 | 2018 | POINT (174.64414 -36.85480) | 7799795 | Section 36 SO 498829 | SO 498829 | Fee Simple Title | Primary | [Create] Fee Simple Title New Zealand Gazette ... | ... | 900731 | 1460.0 | 1459.0 | MULTIPOLYGON (((174.64432 -36.85494, 174.64428... | 174.644048 | -36.854825 | [174.64432; 174.644277; 174.644156; 174.643992... | [-36.854943; -36.854953; -36.854981; -36.85501... | [174.643431; 174.643415] | [-36.854998; -36.854967] |
383272 rows × 21 columns
df.drop(columns="parcel_geometry").to_csv("restricted/QPIDs_Auckland_1990_2020_augmented.csv")